Load in necessary packages

library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(mapview)
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
## 
##     plot
library(censusapi)
## 
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
## 
##     getFunction
library(leaflet)
library(lehdr)
library(fs)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Methodology

In this file I will demostrate the use of st_intersection() function to split PUMS data down to the blockgroup scale. In doing this the area of each PUMA is split into each block group and each block group assigned a fraction of total PUMA area. Assuming uniform distribution of the demographic quantity (e.g. number of workers) over the area of the PUMA, we assume that each block group takes a share of this quantity proportional to its fraction of PUMA area.

Practice PUMA x Blockgroup Intersect

#Load in PUMAS
scc_pumas <- pumas("CA", cb=F, progress_bar=F) %>%
  filter(grepl("Santa Clara County", NAMELSAD10)) #06 = CA, 085 = Santa Clara
scc_pumas_list <- scc_pumas %>% 
  pull(PUMACE10)

#Load in blockgroups
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)
mapview(scc_blockgroups)
scc_bg_list <- scc_blockgroups %>% 
  pull(GEOID)

#Get water area
water <-
  "Santa Clara County" %>%
  map_dfr(function(county){
    area_water("CA", county, progress_bar = FALSE) %>% as.data.frame()
  }) %>%
  st_as_sf() %>%
  st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
  )) %>%
  st_union() %>%
  as_Spatial() %>%
  sp::disaggregate() %>%
  st_as_sf() %>%
  st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
  )) %>%
  mutate(area = st_area(.) %>% as.numeric()) %>%
  filter(area == max(area)) %>%
  dplyr::select(-area)
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
scc_pumas_fixed <-  scc_pumas %>% 
  st_difference(water)
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
scc_blockgroups_fixed <- scc_blockgroups %>% 
  st_difference(water)
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries

Let’s read in Bay PUMA information and join Santa Clara PUMS data to our Santa Clara PUMAS

#Load in Santa Clara County Puma data
scc_puma_data <- readRDS(file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds") %>% #PUMA = Public Use Microdata Area
  filter(PUMA %in% scc_pumas_list) %>% 
  select("PUMA","SOCP","PWGTP")

scc_puma_data_weighted <- data.frame(PUMA = rep(scc_puma_data$PUMA, times = scc_puma_data$PWGTP), SOCP = rep(scc_puma_data$SOCP, times = scc_puma_data$PWGTP)) 

#Load in SOC data
soc_frac <- readRDS(file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/soc_fraction")

#Join SOC data to bay_puma_data for Santa Clara County only
scc_puma_data_soc <- scc_puma_data_weighted %>%
  left_join(soc_frac, by = c("SOCP" = "SOC Occupation")) 
## Warning: Column `SOCP`/`SOC Occupation` joining factor and character vector,
## coercing into character vector
# head(soc_essential_weighted_frac)

scc_puma_laborforce <- scc_puma_data_soc %>% 
 filter(!is.na(SOCP)) %>% #Remove those not in work force
  group_by(PUMA) %>% 
  summarize(fracEssential = round(mean(fracEssential)*100, digits =1),population = n())  
# 
# head(scc_puma_data_soc)

Visualize % In Labor Force By PUMA

#Now we'll visualize by PUMA
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    c(scc_puma_laborforce %>% 
        pull(fracEssential) %>% 
        unique())
)
puma_laborforce <- leaflet(data = scc_puma_laborforce) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      scc_puma_laborforce %>% 
      left_join(scc_pumas_fixed, by = c("PUMA" = "PUMACE10")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(fracEssential),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential,"%"),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Essential Workers" ) 
## Warning: Column `PUMA`/`PUMACE10` joining factor and character vector, coercing
## into character vector
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
puma_laborforce

Let’s split use st_intersection to split PUMA data into blockgroups

1. Give each PUMA an area

2. Use st_intersect to split PUMA into blockgroup

3. Calculate area of each blockgroup

scc_pumas_w_area <- scc_pumas_fixed %>% 
  mutate(puma_area = st_area(.))

#Overlap PUMA1 and Blockgroup
mapview(scc_pumas_fixed, color = "red")+
  mapview(scc_blockgroups_fixed, color = "blue")
#Try just with PUMA 1
bg_puma1 <- scc_pumas_w_area %>% 
  filter(PUMACE10 == "08501") %>% 
  st_intersection(scc_blockgroups_fixed) %>% 
  mutate(bg_area = st_area(.)) %>% 
  mutate(percent_area_puma = bg_area/puma_area) 
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
bg_puma1 %>% mapview()
bg_puma1 %>% 
  summarize(total = sum(percent_area_puma)) # Equals 1
## Simple feature collection with 1 feature and 1 field
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -122.2027 ymin: 37.20049 xmax: -122.0286 ymax: 37.46557
## CRS:            4269
##           total                       geometry
## 1 0.9999996 [1] MULTIPOLYGON (((-122.044 37...
#PUMA intersect all SOCs
bg_puma <- scc_pumas_w_area %>% 
  st_intersection(scc_blockgroups_fixed) %>% 
  mutate(bg_area = st_area(.)) %>% 
  mutate(percent_area_puma = bg_area/puma_area) 
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
bg_puma <- bg_puma %>% 
  mutate(`bg_area` = as.double(bg_area)) %>% 
  filter(bg_area > 0)
# 
# head(bg_puma)

Plot PUMA data in blockgroups

For this we must join every PUMA observation to each blockgroup in a PUMA

Then we’ll multiply these values by the percent_area_puma

bg_puma_laborforce <- bg_puma %>% 
  left_join(scc_puma_laborforce , by = c("PUMACE10" = "PUMA")) %>% 
         as_tibble() %>% 
  select(PUMACE10, GEOID,fracEssential) %>% 
  mutate(fracEssential = as.numeric(fracEssential))
## Warning: Column `PUMACE10`/`PUMA` joining character vector and factor, coercing
## into character vector
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = c(bg_puma_laborforce %>% 
               pull(fracEssential)
  )
)

bg_laborforce <- leaflet(data = bg_puma_laborforce) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = bg_puma_laborforce %>% 
      left_join(scc_blockgroups_fixed) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(fracEssential),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential,"%"),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Essential of Labor Force" ) 
## Joining, by = "GEOID"
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
bg_laborforce

Social Distancing

data_dir <- "/Users/spencerrobinson/"

bg_socialdistancing <- readRDS(file = str_c(data_dir,"pCloud Drive/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds")) %>% 
   filter(origin_census_block_group %in% scc_bg_list) %>% #filter just santa clara county
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date()) 

#put date in correct format
shelter_start <- "2020-03-16" %>% as.Date()
  
bg_week_socialdistancing <- bg_socialdistancing %>% 
  filter(date > shelter_start & date <= "2020-03-30") %>% #First two weeks of shelter in place
  arrange(date) %>% 
  mutate(week = ifelse((date %>% as.numeric()) %% 7 %in% c(2,3), F, T)) %>% 
filter(week == TRUE) %>% 
  group_by(origin_census_block_group) %>% 
  summarize(
    completely_home_device_count = sum(completely_home_device_count),
    device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), 
         `% not completely at home` = (100 - `% Completely at Home`))
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    c(bg_week_socialdistancing %>% 
        pull(`% Completely at Home`) %>% 
        unique())
)

scc_socialdistancing_week_map <- leaflet(data = bg_week_socialdistancing) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      bg_week_socialdistancing %>% 
      left_join(scc_blockgroups_fixed, by = c("origin_census_block_group" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(`% Completely at Home`),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(`% Completely at Home`,"%"),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(pal = blue_pal, values = ~`% Completely at Home`, opacity = 1, title = "% Completely at Home (Blockgroup)" ) 
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
scc_socialdistancing_week_map

Correlation of Social Distancing (Week) and fracEssential at PUMA level

scc_sd_soc <- bg_week_socialdistancing %>% 
  left_join(bg_puma_laborforce, by = c("origin_census_block_group" = "GEOID"))

scc_sd_soc %>%
  ggplot(aes(
    x = `fracEssential`,
    y = `% Completely at Home`
  )) + 
  geom_point() + 
  geom_smooth(method=lm) + 
  labs(
    x = "% of Essential Workers",
    y = "Percent devices completely at home 03-16 to 03-30",
    title = "Bay Area Jose: Social Distancing and Essential Worker % (SOC Codes)"
  )
## `geom_smooth()` using formula 'y ~ x'

lm_model <-scc_sd_soc %>% 
  lm(`% Completely at Home` ~ `fracEssential`, data = .) %>% 
  summary()
lm_model
## 
## Call:
## lm(formula = `% Completely at Home` ~ fracEssential, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.097  -4.754   0.787   6.599  25.292 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   76.91315    5.16530  14.890  < 2e-16 ***
## fracEssential -0.46296    0.08473  -5.464 5.78e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.11 on 1074 degrees of freedom
## Multiple R-squared:  0.02705,    Adjusted R-squared:  0.02614 
## F-statistic: 29.86 on 1 and 1074 DF,  p-value: 5.782e-08

```